library(curl)
## Using libcurl 7.88.1 with LibreSSL/3.3.6
library(car)
## Loading required package: carData
f <- curl("https://raw.githubusercontent.com/fuzzyatelin/fuzzyatelin.github.io/master/AN588_Fall23/zombies.csv")
d <- read.csv(f, header = TRUE, sep = ",", stringsAsFactors = FALSE)
Analysis of Variance and ANOVA Tables
m <- lm(data = d, height ~ weight)
SSY <- sum((m$model$height - mean(m$model$height))^2) # height - mean(height)
SSY
## [1] 18558.61
SSR <- sum((m$fitted.values - mean(m$model$height))^2) # predicted height - mean height
SSR
## [1] 12864.82
SSE <- sum((m$model$height - m$fitted.values)^2) # height - predicted height
SSE
## [1] 5693.785
df_regression <- 1
df_error <- 998
df_y <- 999
MSR <- SSR/df_regression
MSE <- SSE/df_error
MSY <- SSY/df_y
fratio <- MSR/MSE
fratio
## [1] 2254.931
Evaluate F ratio test statistic against F distribution
curve(df(x, df = 1, df2 = 1), col = "green", lty = 3, lwd = 2, xlim = c(0, 10),
main = "Some Example F Distributions\n(vertical line shows critical value for df1=1,df2=998)",
ylab = "f(x)", xlab = "x")
curve(df(x, df = 2, df2 = 2), col = "blue", lty = 3, lwd = 2, add = TRUE)
curve(df(x, df = 4, df2 = 4), col = "red", lty = 3, lwd = 2, add = TRUE)
curve(df(x, df = 8, df2 = 6), col = "purple", lty = 3, lwd = 2, add = TRUE)
curve(df(x, df = 1, df2 = 998), col = "black", lwd = 3, add = TRUE)
legend("top", c("df1=1,df2=1", "df1=2,df2=2", "df1=4,df2=4", "df1=8,df2=6",
"df1=1,df2=998"), lty = 3, lwd = 2, col = c("green", "blue", "red", "purple",
"black"), bty = "n", cex = 0.75)
fcrit <- qf(p = 0.95, df1 = 1, df2 = 998)
fcrit
## [1] 3.850793
abline(v = fcrit)
abline(h = 0)
polygon(cbind(c(fcrit, seq(from = fcrit, to = 10, length.out = 1000), 8), c(0,
df(seq(from = fcrit, to = 8, length.out = 1000), df1 = 1, df2 = 998), 0)),
border = "black", col = "grey")
1 - pf(q = fratio, df1 = 1, df2 = 998)
## [1] 0
a <- aov(data = d, height ~ weight)
summary(a)
## Df Sum Sq Mean Sq F value Pr(>F)
## weight 1 12865 12865 2255 <2e-16 ***
## Residuals 998 5694 6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(m)
## Df Sum Sq Mean Sq F value Pr(>F)
## weight 1 12865 12865 2255 <2e-16 ***
## Residuals 998 5694 6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rsquared <- SSR/SSY
rsquared
## [1] 0.6931998
rho <- sqrt(rsquared)
rho
## [1] 0.8325862
Standard Errors of Regression Coefficients
SSX <- sum((m$model$weight - mean(m$model$weight))^2)
SEbeta1 <- sqrt(MSE/SSX)
SEbeta1
## [1] 0.004106858
SEbeta0 <- sqrt((MSE * sum(m$model$weight^2))/(1000 * SSX))
SEbeta0
## [1] 0.5958147
SEyhat <- sqrt(MSE * (1/1000 + (m$model$weight - mean(m$model$weight))^2/SSX))
head(SEyhat)
## [1] 0.08978724 0.07620966 0.08414480 0.09533986 0.08904151 0.08341218
summary(m)
##
## Call:
## lm(formula = height ~ weight, data = d)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.1519 -1.5206 -0.0535 1.5167 9.4439
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.565446 0.595815 66.41 <2e-16 ***
## weight 0.195019 0.004107 47.49 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.389 on 998 degrees of freedom
## Multiple R-squared: 0.6932, Adjusted R-squared: 0.6929
## F-statistic: 2255 on 1 and 998 DF, p-value: < 2.2e-16
Calculate the residuals from the regression of zombie height on weight and plot these in relation to weight (the x variable).
m <- lm(data = d, height ~ weight)
plot(x = d$weight, y = m$residuals)
# or
e <- resid(m)
plot(x = d$weight, y = e)
Now, plot a histogram of your residuals… ideally they are normally distributed!
hist(e, xlim = c(-4 * sd(e), 4 * sd(e)), breaks = 20, main = "Histogram of Residuals")
plot(m$model$weight, m$residuals)
par(mfrow = c(2, 2))
plot(m)
qqnorm(m$residuals)
qqPlot(m$residuals)
## [1] 954 799
Tests for normality
s <- shapiro.test(m$residuals)
s
##
## Shapiro-Wilk normality test
##
## data: m$residuals
## W = 0.99713, p-value = 0.07041
f <- curl("https://raw.githubusercontent.com/fuzzyatelin/fuzzyatelin.github.io/master/AN588_Fall23/KamilarAndCooperData.csv")
d <- read.csv(f, header = TRUE, sep = ",", stringsAsFactors = FALSE)
head(d)
## Scientific_Name Family Genus Species
## 1 Allenopithecus_nigroviridis Cercopithecidae Allenopithecus nigroviridis
## 2 Allocebus_trichotis Cercopithecidae Allocebus trichotis
## 3 Alouatta_belzebul Atelidae Alouatta belzebul
## 4 Alouatta_caraya Atelidae Alouatta caraya
## 5 Alouatta_guariba Atelidae Alouatta guariba
## 6 Alouatta_palliata Atelidae Alouatta palliata
## Brain_Size_Species_Mean Brain_Size_Female_Mean Brain_size_Ref
## 1 58.02 53.70 Isler et al 2008
## 2 NA NA
## 3 52.84 51.19 Isler et al 2008
## 4 52.63 47.80 Isler et al 2008
## 5 51.70 49.08 Isler et al 2008
## 6 49.88 48.04 Isler et al 2008
## Body_mass_male_mean Body_mass_female_mean Mass_Dimorphism
## 1 6130 3180 1.928
## 2 92 84 1.095
## 3 7270 5520 1.317
## 4 6525 4240 1.539
## 5 5800 4550 1.275
## 6 7150 5350 1.336
## Mass_Ref MeanGroupSize AdultMales AdultFemale AdultSexRatio
## 1 Isler et al 2008 NA NA NA NA
## 2 Smith and Jungers 1997 1.00 1.00 1.0 NA
## 3 Isler et al 2008 7.00 1.00 1.0 1.00
## 4 Isler et al 2008 8.00 2.30 3.3 1.43
## 5 Isler et al 2008 6.53 1.37 2.2 1.61
## 6 Isler et al 2008 12.00 2.90 6.3 2.17
## Social_Organization_Ref
## 1
## 2 Kappeler 1997
## 3 Campbell et al 2007
## 4 van Schaik et al. 1999; Kappeler and Pereira 2003; Nunn & van Schaik 2000
## 5 Campbell et al 2007
## 6 van Schaik et al. 1999; Kappeler and Pereira 2003; Nunn & van Schaik 2000
## InterbirthInterval_d Gestation WeaningAge_d MaxLongevity_m LitterSz
## 1 NA NA 106.15 276.0 1.01
## 2 NA NA NA NA 1.00
## 3 NA NA NA NA NA
## 4 337.62 187 323.16 243.6 1.01
## 5 NA NA NA NA NA
## 6 684.37 186 495.60 300.0 1.02
## Life_History_Ref GR_MidRangeLat_dd Precip_Mean_mm Temp_Mean_degC AET_Mean_mm
## 1 Jones et al. 2009 -0.17 1574.0 25.2 1517.8
## 2 -16.59 1902.3 20.3 1388.2
## 3 -6.80 1643.5 24.9 1286.6
## 4 Jones et al. 2009 -20.34 1166.4 22.9 1193.1
## 5 -21.13 1332.3 19.6 1225.7
## 6 Jones et al. 2009 6.95 1852.6 23.7 1300.0
## PET_Mean_mm Climate_Ref HomeRange_km2 HomeRangeRef DayLength_km
## 1 1589.4 Jones et al. 2009 NA NA
## 2 1653.7 Jones et al. 2009 NA NA
## 3 1549.8 Jones et al. 2009 NA NA
## 4 1404.9 Jones et al. 2009 NA 0.40
## 5 1332.2 Jones et al. 2009 0.03 Jones et al. 2009 NA
## 6 1633.9 Jones et al. 2009 0.19 Jones et al. 2009 0.32
## DayLengthRef Territoriality Fruit Leaves Fauna DietRef1
## 1 NA NA
## 2 NA NA
## 3 NA 57.3 19.1 0.0 Campbell et al. 2007
## 4 Nunn et al. 2003 NA 23.8 67.7 0.0 Campbell et al. 2007
## 5 NA 5.2 73.0 0.0 Campbell et al. 2007
## 6 Nunn et al. 2003 0.6506 33.1 56.4 0.0 Campbell et al. 2007
## Canine_Dimorphism Canine_Dimorphism_Ref Feed Move Rest Social
## 1 2.210 Plavcan & Ruff 2008 NA NA NA NA
## 2 NA NA NA NA NA
## 3 1.811 Plavcan & Ruff 2008 13.75 18.75 57.30 10.00
## 4 1.542 Plavcan & Ruff 2008 15.90 17.60 61.60 4.90
## 5 1.783 Plavcan & Ruff 2008 18.33 14.33 64.37 3.00
## 6 1.703 Plavcan & Ruff 2008 17.94 12.32 66.14 3.64
## Activity_Budget_Ref
## 1
## 2
## 3 Campbell et al. 2007
## 4 Campbell et al. 2007
## 5 Campbell et al. 2007
## 6 Campbell et al. 2007
plot(data = d, WeaningAge_d ~ Body_mass_female_mean)
model <- lm(data = d, WeaningAge_d ~ Body_mass_female_mean)
summary(model)
##
## Call:
## lm(formula = WeaningAge_d ~ Body_mass_female_mean, data = d)
##
## Residuals:
## Min 1Q Median 3Q Max
## -720.39 -115.48 -54.05 58.11 471.22
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.016e+02 2.012e+01 10.02 <2e-16 ***
## Body_mass_female_mean 2.013e-02 1.927e-03 10.44 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 183.4 on 114 degrees of freedom
## (97 observations deleted due to missingness)
## Multiple R-squared: 0.489, Adjusted R-squared: 0.4845
## F-statistic: 109.1 on 1 and 114 DF, p-value: < 2.2e-16
plot(model)
qqPlot(model$residuals)
## 97 44
## 48 24
# Test for normality
s <- shapiro.test(model$residuals)
s
##
## Shapiro-Wilk normality test
##
## data: model$residuals
## W = 0.86291, p-value = 5.825e-09
Data Transformations
d$logWeaningAge <- log(d$WeaningAge_d)
d$logFemaleBodyMass <- log(d$Body_mass_female_mean)
plot(data = d, logWeaningAge ~ logFemaleBodyMass)
model <- lm(data = d, logWeaningAge ~ logFemaleBodyMass)
summary(model)
##
## Call:
## lm(formula = logWeaningAge ~ logFemaleBodyMass, data = d)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.10639 -0.32736 0.00848 0.32214 1.11010
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.7590 0.2196 8.011 1.08e-12 ***
## logFemaleBodyMass 0.4721 0.0278 16.983 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4532 on 114 degrees of freedom
## (97 observations deleted due to missingness)
## Multiple R-squared: 0.7167, Adjusted R-squared: 0.7142
## F-statistic: 288.4 on 1 and 114 DF, p-value: < 2.2e-16
qqPlot(model$residuals)
## 44 213
## 24 116
s <- shapiro.test(model$residuals)
s
##
## Shapiro-Wilk normality test
##
## data: model$residuals
## W = 0.99367, p-value = 0.8793
par(mfrow = c(1, 2))
a <- 2
b <- 2
# log x
x <- seq(from = 0, to = 100, length.out = 1000)
y <- a + b * log(x)
plot(x, y, type = "l", main = "untransformed")
plot(log(x), y, type = "l", main = "log(x)")
# log y
x <- seq(from = 0, to = 10, length.out = 1000)
y <- exp(a + b * x)
plot(x, y, type = "l", main = "untransformed")
plot(x, log(y), type = "l", main = "log(y)")
# assymptotic
x <- seq(from = 1, to = 100, length.out = 100)
y <- (a * x)/(1 + b * x)
plot(x, y, type = "l", main = "untransformed")
plot(1/x, y, type = "l", main = "1/x")
# reciprocal
x <- seq(from = 1, to = 100, length.out = 100)
y <- a + b/x
plot(x, y, type = "l", main = "untransformed")
plot(1/x, y, type = "l", main = "1/x")
# power
x <- seq(from = 1, to = 100, length.out = 100)
y <- a * x^b
plot(x, y, type = "l", main = "untransformed")
plot(x^b, y, type = "l", main = "x^b")
# exp
x <- seq(from = 1, to = 10, length.out = 100)
y <- a * exp(b * x)
plot(x, y, type = "l", main = "untransformed")
plot(x, log(y), type = "l", main = "log(y)")